import databricks.koalas as ks
import numpy as np
import pandas as pd
import pyspark.pandas as ps

if __name__ == '__main__':
    dates = pd.date_range("20130101", periods=6)
    pdf = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))

    pdf1 = pdf.reindex(index=dates[0:4], columns=list(pdf.columns) + ['E'])
    pdf1.loc[dates[0]:dates[1], 'E'] = 1
    pdf1.index = pdf1.index.astype('str')
    print("pandas df", pdf1)

    kdf1 = ps.from_pandas(pdf1)
    print("koalas df", kdf1)

    kdf2 = kdf1.dropna(how='any')
    print("删除null值行", kdf2)

    kdf3 = kdf1.fillna(value=5)
    print("填充null值行", kdf3)
